# coding: utf-8

import demo_common
from gensim import models


def get_lsi_model(corpus, dictionary, num_topics):
    return models.LsiModel(corpus=corpus, num_topics=num_topics, id2word=dictionary)


if __name__ == '__main__':
    num_topics = 10
    train_set = demo_common.get_train_set()
    print('train_set len', len(train_set))
    dictionary = demo_common.get_dictionary(train_set)
    corpus = demo_common.get_bow_corpus(dictionary=dictionary, train_set=train_set)
    tfidf = demo_common.get_tfidf_model(dictionary=dictionary, corpus=corpus)
    corpus_tfidf = tfidf[corpus]
    lsi_tfidf = get_lsi_model(corpus=corpus_tfidf, dictionary=dictionary, num_topics=num_topics)
    for item in lsi_tfidf.show_topics():
        print(item)
    lsi_bow = get_lsi_model(corpus=corpus, dictionary=dictionary, num_topics=num_topics)
    for item in lsi_bow.show_topics():
        print(item)
